CRIM 515 Final Project In-Class

12 December 2024

Your Name

Some R Markdown styles to try, if you’re interested:

“almond”, “awsm.css”, “axist”, “bamboo”, “bullframe”, “holiday”, “kacit”, “latex.css”, “markdown-splendor”, “markdown-retro”, “markdown-air”, “markdown-modest”, “marx”, “minicss”, “new.css”, “no-class”, “picocss”, “sakura”, “sakura-vader”, “semantic”, “simplecss”, “style-sans”, “style-serif”, “stylize”, “superstylin”, “tacit”, “vanilla”, “water”, “water-dark”, “writ”

1. Research Question

EXAMPLE: Using 2008-2024 data, this project forecasts animal-related calls for service counts and locations in the City of Fairfax, VA for each month in 2025.

2. Literature Review

INSERT THAT TEXT, WITH CITATION, HERE.

3. Data

Libraries

library(tidyverse)
library(leaflet)
library(sf)
library(tigris)
library(zoo)
library(aTSA)
library(tseries)
library(forecast)

Get the latest Fairfax calls for service data:

calls.full <- read.csv(sprintf("https://docs.google.com/uc?id=%s&export=download",
                               "1ti7BCMe1vvDVVr75p6lqvgouCy_ae7rn"))

Format the date and add month columns:

calls.full$DATE <- as.Date(calls.full$date, format = "%Y-%m-%d")
calls.full$MONTHS <- months(calls.full$DATE)
calls.full$MONTH <- month(calls.full$DATE)

Identify a set of calls to analyze, and then filter the data:

group <- c("ANIMAL BITE", "ANIMAL CASE", "ANIMAL CONTROL", "ANIMAL LOST-FOUND")
calls.subset <- calls.full %>% filter(calls.full$type %in% group)

Calculate calls per day:

calls <- calls.subset %>%
  group_by(DATE) %>%
  summarise(CALL.COUNT = n())

Fill in blank days:

calls <- calls %>%
  complete(DATE = seq.Date(min(DATE), max(DATE), by = "day")) %>%
  mutate(WEEKDAY = lubridate::wday(DATE, label = T, week_start = 1), 
         MONTH = lubridate::month(DATE, label = T, abbr = F),
         WEEK = isoweek(DATE),
         DAY = day(DATE),
         YEAR = year(DATE))
calls <- replace(calls, is.na(calls), 0)

Make the data a time series:

cleaned.calls <- zoo(calls$CALL.COUNT, 
                       seq(from = as.Date(min(calls$DATE)), 
                           to = as.Date(max(calls$DATE)), by = 1))

Generate basic stats and a basic graph:

summary(cleaned.calls)
##      Index            cleaned.calls   
##  Min.   :2007-01-03   Min.   : 0.000  
##  1st Qu.:2011-06-18   1st Qu.: 1.000  
##  Median :2015-12-02   Median : 1.000  
##  Mean   :2015-12-02   Mean   : 1.655  
##  3rd Qu.:2020-05-17   3rd Qu.: 2.000  
##  Max.   :2024-10-31   Max.   :11.000
plot(cleaned.calls)
title("Animal Calls Per Day")

Make the data stationary:

stationary1 <- diff(cleaned.calls, differences = 1)

Plot each one:

plot(stationary1)

Run ADF tests on the original data, and the stationary datasets:

adf.test(as.matrix(stationary1))
## Warning in adf.test(as.matrix(stationary1)): p-value smaller than printed
## p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  as.matrix(stationary1)
## Dickey-Fuller = -30.635, Lag order = 18, p-value = 0.01
## alternative hypothesis: stationary

Stationary1 is the winner.

Determine the p value using PACF:

pacf(stationary1)

pacf(stationary1, pl=FALSE)
## 
## Partial autocorrelations of series 'stationary1', by lag
## 
##      1      2      3      4      5      6      7      8      9     10     11 
## -0.490 -0.325 -0.234 -0.208 -0.168 -0.147 -0.126 -0.100 -0.088 -0.086 -0.051 
##     12     13     14     15     16     17     18     19     20     21     22 
## -0.068 -0.099 -0.060 -0.044 -0.033 -0.056 -0.028 -0.044 -0.082 -0.026 -0.024 
##     23     24     25     26     27     28     29     30     31     32     33 
## -0.040 -0.056 -0.031 -0.042 -0.069 -0.018 -0.013 -0.019 -0.003 -0.034  0.001 
##     34     35     36     37     38 
## -0.032 -0.039 -0.004 -0.009 -0.021

The p value for this example will be 13.

Determine the q value using ACF:

acf(stationary1)

acf(stationary1, pl=FALSE)
## 
## Autocorrelations of series 'stationary1', by lag
## 
##      0      1      2      3      4      5      6      7      8      9     10 
##  1.000 -0.490 -0.007  0.005 -0.020  0.014 -0.005  0.003  0.006 -0.005 -0.005 
##     11     12     13     14     15     16     17     18     19     20     21 
##  0.018 -0.026 -0.010  0.036 -0.009  0.000 -0.018  0.024 -0.020 -0.014  0.048 
##     22     23     24     25     26     27     28     29     30     31     32 
## -0.024 -0.010 -0.002  0.020 -0.017 -0.011  0.041 -0.020 -0.007  0.010 -0.022 
##     33     34     35     36     37     38 
##  0.031 -0.034  0.014  0.024 -0.023 -0.005

The q value for this example will be 1.

Build the ARIMA model:

arima.calls <- auto.arima(cleaned.calls, d = 1, max.p = 13, max.q = 1, seasonal = T)
summary(arima.calls)
## Series: cleaned.calls 
## ARIMA(1,1,1) 
## 
## Coefficients:
##          ar1      ma1
##       0.0200  -0.9624
## s.e.  0.0129   0.0034
## 
## sigma^2 = 2.042:  log likelihood = -11563.73
## AIC=23133.45   AICc=23133.46   BIC=23153.8
## 
## Training set error measures:
##                        ME     RMSE      MAE  MPE MAPE      MASE         ACF1
## Training set -0.001690847 1.428774 1.119778 -Inf  Inf 0.7610518 0.0002094817

Check the model residuals (probably not worth keeping in your Final):

Identify our forecast window:

forecast.window <- as.numeric(as.Date("2025-12-31")-max(calls$DATE))

Forecast 2025

forecast.2025 <- forecast(arima.calls, h=forecast.window)
autoplot(forecast.2025)

Make a forecast table:

forecast.values <- as.data.frame(forecast.2025$mean)
forecast.values$ID <- seq.int(nrow(forecast.values))

forecast.upper <- as.data.frame(forecast.2025$upper)
forecast.upper$ID <- seq.int(nrow(forecast.upper))

forecast.values <- forecast.values %>%
  left_join(forecast.upper, by = 'ID')

colnames(forecast.values) <- c("MEAN", "ID", "CI80", "CI95")
forecast.values$DATE <- as.Date(max(calls$DATE) + forecast.values$ID)
forecast.values$MONTH <- months(forecast.values$DATE)

Filter to 2025, summarize by month:

forecast.values.2025 <- subset(forecast.values, forecast.values$DATE > '2024-12-31')

forecast.months <- forecast.values.2025 %>%
  group_by(MONTH) %>%
  summarise(MEAN = round(sum(MEAN),0), FORECAST.95 = round(sum(CI95),0), FORECAST.80 = round(sum(CI80),0))

forecast.months$DIFF <- forecast.months$FORECAST.95 - forecast.months$FORECAST.80

Graphs

  1. Calls per day with a trend line:
graph.calls <- ggplot(calls, aes(x=DATE, y=CALL.COUNT)) + 
  geom_line() + 
  scale_x_date(date_breaks = "1 year", date_labels = "%Y") + 
  xlab("Years") + 
  ylab("Call Count") + 
  ggtitle("TITLE HERE") + 
  geom_area(fill="lightblue", color="black")

graph.calls + 
  geom_smooth(method = lm, col = "red", se = FALSE) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))
## `geom_smooth()` using formula = 'y ~ x'

  1. Calls per day with a trend line and the forecast:
calls2024 <- calls[c(1,2)]
calls2025 <- forecast.values[c(5,1)]
names(calls2025) <- c("DATE", "CALL.COUNT")

new.calls <- rbind(calls2024, calls2025)

graph.new.calls <- ggplot(new.calls, aes(x=DATE, y=CALL.COUNT)) + 
  geom_line() + 
  scale_x_date(date_breaks = "1 year", date_labels = "%Y") + 
  xlab("Years") + 
  ylab("Call Count") + 
  ggtitle("TITLE HERE") + 
  geom_area(fill="lightblue", color="black")

graph.new.calls + 
  geom_smooth(method = lm, col = "red", se = FALSE) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))
## `geom_smooth()` using formula = 'y ~ x'

  1. Monthly upper bounds graph:
forecast.months$MONTH <- factor(forecast.months$MONTH, levels = forecast.months$MONTH)
forecast.long <- pivot_longer(forecast.months, cols = c(FORECAST.80, DIFF), 
                          names_to = "Category", values_to = "Value")
forecast.long$Category <- gsub("DIFF", "95% Confidence Interval", forecast.long$Category)
forecast.long$Category <- gsub("FORECAST.80", "80% Confidence Interval", forecast.long$Category)

ggplot(forecast.long, aes(x = MONTH, y = Value, fill = fct_rev(Category))) +
  geom_bar(stat = "identity") +
  geom_text(aes(label = Value), size = 3, colour = 'white', position = position_stack(vjust = 0.5)) + 
  labs(title = "City of Fairfax 2025 Monthly Forecast Upper Bounds", 
       x = "Month", 
       y = "Call Count") +
  scale_fill_manual(values = c("95% Confidence Interval" = "gold",
                               "80% Confidence Interval" = "maroon"),
                    name = "Forecasts") +
  scale_x_discrete(limits = c("January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December")) + 
  coord_cartesian(ylim = c(0, 175)) +
  theme_classic() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

  1. Monthly forecasts by mean predicted value:
ggplot(forecast.months, aes(x = MONTH, y = MEAN)) +
  geom_bar(stat = "identity") +
  scale_x_discrete(limits = c("January", "February", "March", "April", "May", "June", "July", "August",
                              "September", "October", "November", "December")) +
  coord_cartesian(ylim = c(0, 50)) +
  theme_classic() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  labs(title = "City of Fairfax 2025 Mean Monthly Forecast", 
       x = "Month", 
       y = "Call Count")

Personally, I would use graph #2 AND either graph #3 or graph #4 (so two graphs total).

Maps

Get the geographic data:

fairfax.roads <- roads("VA", "Fairfax city")
## Downloading: 16 kB     Downloading: 16 kB     Downloading: 52 kB     Downloading: 52 kB     Downloading: 52 kB     Downloading: 52 kB     Downloading: 52 kB     Downloading: 52 kB     Downloading: 52 kB     Downloading: 52 kB     Downloading: 68 kB     Downloading: 68 kB     Downloading: 68 kB     Downloading: 68 kB     Downloading: 84 kB     Downloading: 84 kB     Downloading: 84 kB     Downloading: 84 kB     Downloading: 84 kB     Downloading: 84 kB     Downloading: 100 kB     Downloading: 100 kB     Downloading: 100 kB     Downloading: 100 kB     Downloading: 100 kB     Downloading: 100 kB
fairfax.outline <- county_subdivisions("VA", "Fairfax city")
## Retrieving data for the year 2021
##   |                                                                              |                                                                      |   0%  |                                                                              |                                                                      |   1%  |                                                                              |=                                                                     |   1%  |                                                                              |=                                                                     |   2%  |                                                                              |==                                                                    |   2%  |                                                                              |==                                                                    |   3%  |                                                                              |===                                                                   |   4%  |                                                                              |====                                                                  |   5%  |                                                                              |=====                                                                 |   7%  |                                                                              |======                                                                |   9%  |                                                                              |=======                                                               |  10%  |                                                                              |========                                                              |  12%  |                                                                              |=========                                                             |  13%  |                                                                              |==========                                                            |  15%  |                                                                              |===========                                                           |  16%  |                                                                              |============                                                          |  18%  |                                                                              |=============                                                         |  19%  |                                                                              |==============                                                        |  20%  |                                                                              |===============                                                       |  22%  |                                                                              |================                                                      |  23%  |                                                                              |=================                                                     |  25%  |                                                                              |==================                                                    |  26%  |                                                                              |===================                                                   |  28%  |                                                                              |====================                                                  |  29%  |                                                                              |======================                                                |  31%  |                                                                              |=======================                                               |  32%  |                                                                              |========================                                              |  34%  |                                                                              |=========================                                             |  35%  |                                                                              |==========================                                            |  37%  |                                                                              |===========================                                           |  38%  |                                                                              |============================                                          |  39%  |                                                                              |============================                                          |  40%  |                                                                              |=============================                                         |  41%  |                                                                              |=============================                                         |  42%  |                                                                              |==============================                                        |  42%  |                                                                              |===============================                                       |  44%  |                                                                              |===============================                                       |  45%  |                                                                              |================================                                      |  46%  |                                                                              |=================================                                     |  47%  |                                                                              |=================================                                     |  48%  |                                                                              |==================================                                    |  49%  |                                                                              |===================================                                   |  49%  |                                                                              |====================================                                  |  51%  |                                                                              |=====================================                                 |  52%  |                                                                              |======================================                                |  54%  |                                                                              |=======================================                               |  55%  |                                                                              |========================================                              |  57%  |                                                                              |=========================================                             |  58%  |                                                                              |=========================================                             |  59%  |                                                                              |==========================================                            |  60%  |                                                                              |===========================================                           |  61%  |                                                                              |============================================                          |  63%  |                                                                              |=============================================                         |  64%  |                                                                              |==============================================                        |  66%  |                                                                              |===============================================                       |  67%  |                                                                              |================================================                      |  69%  |                                                                              |=================================================                     |  70%  |                                                                              |==================================================                    |  72%  |                                                                              |===================================================                   |  73%  |                                                                              |====================================================                  |  75%  |                                                                              |=====================================================                 |  76%  |                                                                              |======================================================                |  78%  |                                                                              |=======================================================               |  79%  |                                                                              |=========================================================             |  81%  |                                                                              |==========================================================            |  82%  |                                                                              |===========================================================           |  84%  |                                                                              |============================================================          |  85%  |                                                                              |=============================================================         |  87%  |                                                                              |==============================================================        |  88%  |                                                                              |===============================================================       |  90%  |                                                                              |================================================================      |  91%  |                                                                              |=================================================================     |  93%  |                                                                              |==================================================================    |  94%  |                                                                              |===================================================================   |  95%  |                                                                              |====================================================================  |  97%  |                                                                              |===================================================================== |  98%  |                                                                              |======================================================================| 100%

Make the hotspots by month:

ggplot() +
  geom_sf(data = fairfax.outline, color = "grey") +
  geom_hex(aes(x = lon, y = lat), data = calls.subset, bins = 6) +
  scale_fill_continuous(type = "viridis") +
  geom_sf(data = fairfax.roads, color = "black", alpha = 0.4) +
  theme_classic() +
  facet_wrap(~ MONTH)
## Warning: Removed 65 rows containing non-finite outside the scale range
## (`stat_binhex()`).

Or, make hotspots by month using only “recent” data (where recent is post-COVID or a similar change to your data):

ggplot() +
  geom_sf(data = fairfax.outline, color = "grey") +
  geom_hex(aes(x = lon, y = lat), data = subset(calls.subset, calls.subset$year > '2019'), 
           bins = 6) +
  scale_fill_continuous(type = "viridis") +
  geom_sf(data = fairfax.roads, color = "black", alpha = 0.4) +
  theme_classic() +
  facet_wrap(~ MONTH)
## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_binhex()`).

Don’t include both sets of these maps - either use the full time range OR the post-COVID one. Not both.